Error (delta) for blocks.0.attn.hook_result attribution: tensor([-0.0575, -0.6158, -0.4065, 0.2226, 0.0106, 4.4232, 0.3767, 0.3323,
5.5475, -0.2457], device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.0.mlp.hook_post attribution: tensor([0.1597, 0.2323, 0.1091, 0.2517, 0.2843, 0.1124, 0.4216, 0.1101, 0.0251,
0.3263], device='cuda:0')
Error (delta) for blocks.1.attn.hook_result attribution: tensor([ 1.0394e-06, -4.1723e-07, -5.4389e-07, 1.1660e-06, -6.1095e-06,
-1.1623e-05, 2.5164e-06, 4.2468e-07, 1.8358e-05, -2.5705e-07],
device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.1.mlp.hook_post attribution: tensor([ 2.0862e-07, -2.2203e-06, -1.0133e-06, -8.9407e-08, 2.9951e-06,
7.8678e-06, 3.5763e-07, 5.7369e-07, -2.8610e-06, 1.0058e-06],
device='cuda:0')
Error (delta) for blocks.2.attn.hook_result attribution: tensor([ 9.6858e-08, -7.4506e-08, -3.2578e-06, 6.2585e-07, 3.2708e-06,
2.3127e-05, -1.5749e-06, -3.0920e-07, 1.2815e-05, 8.0466e-07],
device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.2.mlp.hook_post attribution: tensor([-5.8860e-07, -1.1176e-06, -9.2387e-07, -3.0100e-06, 3.2336e-06,
-8.5831e-06, -6.7055e-07, -1.2219e-06, 1.9073e-06, 2.7269e-06],
device='cuda:0')
Error (delta) for blocks.3.attn.hook_result attribution: tensor([-6.2585e-07, 1.3486e-06, -7.9675e-07, 4.6380e-07, -2.4587e-06,
-1.4305e-06, 1.2107e-07, 2.6450e-07, 1.1861e-05, -3.1367e-06],
device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.3.mlp.hook_post attribution: tensor([-1.1474e-06, 6.7055e-07, 1.6391e-07, -9.5367e-07, 1.3560e-06,
1.9073e-06, 1.5944e-06, -1.3635e-06, -8.1062e-06, -1.6950e-07],
device='cuda:0')
Error (delta) for blocks.4.attn.hook_result attribution: tensor([-6.8545e-07, -7.5996e-07, 1.9409e-06, -1.0431e-07, -5.1409e-07,
-6.7353e-06, 1.0580e-06, -5.2899e-07, 2.8610e-06, 1.4901e-08],
device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.4.mlp.hook_post attribution: tensor([ 5.9605e-07, -1.3858e-06, -1.2815e-06, 1.7881e-07, 1.6540e-06,
1.3769e-05, -1.5050e-06, -6.3330e-07, 1.4931e-05, -4.0233e-07],
device='cuda:0')
Error (delta) for blocks.5.attn.hook_result attribution: tensor([-5.2899e-07, -1.7509e-07, -1.2666e-07, 1.0096e-06, 4.7013e-06,
4.1723e-07, -1.0058e-06, 1.5274e-07, -2.9355e-06, -1.1399e-06],
device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.5.mlp.hook_post attribution: tensor([-4.3213e-07, 1.4305e-06, -8.9407e-07, -1.8030e-06, -2.5332e-06,
5.7220e-06, -7.1526e-07, 0.0000e+00, -1.1921e-05, -2.0564e-06],
device='cuda:0')
Error (delta) for blocks.6.attn.hook_result attribution: tensor([-1.5609e-06, 6.4820e-07, 1.4901e-06, -1.6540e-06, 1.4901e-07,
3.9339e-06, 1.8924e-06, -4.2841e-08, -8.4639e-06, 2.1867e-06],
device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.6.mlp.hook_post attribution: tensor([-1.1623e-06, -7.4506e-07, 4.7684e-07, 7.7486e-07, 7.4506e-07,
8.8215e-06, -3.2037e-07, -4.4703e-07, -1.4305e-06, 4.0233e-07],
device='cuda:0')
Error (delta) for blocks.7.attn.hook_result attribution: tensor([ 7.0781e-08, -1.4380e-06, 7.7486e-07, -7.8976e-07, -1.5087e-06,
3.7551e-06, -3.8743e-07, 8.7917e-07, 3.5167e-06, -2.3097e-07],
device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.7.mlp.hook_post attribution: tensor([-5.2154e-08, 1.2666e-06, 1.4305e-06, -5.3644e-07, 2.0862e-07,
9.3579e-06, 7.0781e-07, -1.9372e-06, -1.6689e-05, -1.1623e-06],
device='cuda:0')
Error (delta) for blocks.8.attn.hook_result attribution: tensor([-1.2293e-07, 8.9407e-08, 1.6168e-06, -1.6615e-06, 4.1947e-06,
-1.3828e-05, 7.3388e-07, 2.3097e-07, -2.4438e-06, 4.7684e-07],
device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.8.mlp.hook_post attribution: tensor([-7.9721e-07, -7.4506e-08, -2.3246e-06, 5.9605e-08, 4.3958e-06,
1.6451e-05, 5.0664e-07, 5.9605e-07, -1.7762e-05, 1.1921e-07],
device='cuda:0')
Error (delta) for blocks.9.attn.hook_result attribution: tensor([-4.0978e-08, 1.4901e-07, 8.4937e-07, -8.9407e-08, -1.7136e-06,
-1.1921e-06, 3.7253e-09, 6.4448e-07, 4.8578e-06, -1.2480e-07],
device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.9.mlp.hook_post attribution: tensor([ 1.4007e-06, 7.4506e-07, 1.3262e-06, 1.0282e-06, -1.2815e-06,
6.9737e-06, -5.3644e-07, -6.4820e-07, -1.1832e-05, 5.0664e-07],
device='cuda:0')
Error (delta) for blocks.10.attn.hook_result attribution: tensor([ 1.6391e-06, -1.1325e-06, 1.3411e-06, 1.3262e-06, 3.5316e-06,
-1.6451e-05, 1.7509e-07, -8.1956e-08, -1.6332e-05, 2.6487e-06],
device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.10.mlp.hook_post attribution: tensor([ 1.3560e-06, -7.7486e-07, 1.8477e-06, -7.1526e-07, -2.2054e-06,
1.8477e-06, -8.3447e-07, 5.9605e-08, 1.3590e-05, 1.1921e-06],
device='cuda:0')
Error (delta) for blocks.11.attn.hook_result attribution: tensor([ 7.6741e-07, -1.7881e-07, 1.6540e-06, 2.3842e-07, 2.4885e-06,
1.3351e-05, 2.9802e-07, 1.3709e-06, 8.1062e-06, -1.4305e-06],
device='cuda:0')
torch.Size([10, 21, 12, 768])
Error (delta) for blocks.11.mlp.hook_post attribution: tensor([ 5.9605e-07, -1.4305e-06, -8.9407e-07, 5.9605e-07, -1.9073e-06,
8.6427e-06, 0.0000e+00, -3.5763e-07, 4.5002e-06, 2.2352e-06],
device='cuda:0')